Source. The Environmental Data Initiative (EDI) provides access to a wide range of datasets from the NSF Long Term Ecological Research (LTER) program. We will focus on datasets related to atmospheric deposition, impervious surface area, population density, and precipitation.
Methods. We will use the EDIutils package to search for and download relevant datasets, then process them for analysis. The datasets will be cleaned, filtered, and prepared for visualization.
Searching and Accessing HBES LTER Data from the Environmental Data Initiative repository
# Find all HBR LTER data packages, displaying the package id, title, and DOI
query <- 'q=keyword:hubbard&fl=packageid,title,doi,author,organization,pubdate,abstract'
res <- search_data_packages(query)
# Create the gt table with improved formatting
hbes_lter_gt <- gt(res) %>%
tab_header(title = "HBES LTER Datasets") %>% fmt_auto(everything()) %>%
fmt_url(columns = doi,label = "DOI", color="darkolivegreen3", show_underline = FALSE) %>%
cols_merge(c(title, doi), pattern = "{1}<br>({2})") %>%
cols_merge(columns = c(authors, organizations, pubdate), pattern = html("{1}<br><i>{2}</i><br>({3})")
) %>% cols_label("authors"="Authors (Date)", "title"="Dataset Title & DOI") %>%
cols_label_with(fn = function(x) {x %>% toupper() %>% str_replace_all("^|$", "**") %>%
sprintf("<span style='font-size:12px;'>%s</span>", .) %>%
html() %>% md()}) %>%
cols_align(columns = doi, align = "center") %>% cols_align(align = "auto") %>%
cols_width(columns = c(packageid) ~ px(125)) %>%
cols_width(columns = c(abstract) ~ px(300)) %>%
cols_width(columns = c(title) ~ px(185)) %>%
cols_width(columns = c(authors) ~ px(185)) %>%
tab_style(style = cell_text(size = px(12)),locations = cells_body()) %>%
tab_style(style = cell_text(size = px(11)), locations = cells_body(columns = c(abstract))) %>%
tab_source_note(source_note = md("Datasets from the Environmental Data Initiative")) %>%
opt_table_font(font = "rounded-sans") %>%
tab_options(ihtml.active =TRUE, ihtml.use_highlight=TRUE, ihtml.use_filters=TRUE, ihtml.use_sorting=TRUE,
page.header.use_tbl_headings=TRUE, ihtml.use_search=TRUE, ihtml.use_compact_mode=TRUE)
# Display the table
hbes_lter_gtExample on how to download data from the EDI Utils package to analyze and plot data from HBES LTER. Data is from a previously published study on health and mycorrhizal colonization response of sugar maple (Acer saccharum) seedlings to calcium addition in Watershed 1 at the Hubbard Brook Experimental Forest.
# Clear Directory Contents
if (!dir_exists(tempdir())) {
dir_create(tempdir())
} else {
dir_ls(tempdir()) %>% file_delete()
}
# Downloading Zipped Files
packageid <- "knb-lter-hbr.157.3"
read_data_package_archive(packageid, path = tempdir())## Downloading: 8 kB Downloading: 8 kB Downloading: 8 kB Downloading: 8 kB Downloading: 16 kB Downloading: 16 kB Downloading: 24 kB Downloading: 24 kB Downloading: 24 kB Downloading: 24 kB Downloading: 38 kB Downloading: 38 kB Downloading: 38 kB Downloading: 38 kB
# Viewing Entities
entities <- read_data_entity_names(packageid)
entities_gt <- gt(entities) %>% fmt_auto() %>%
cols_label_with(fn = function(x) {x %>% toupper() %>% str_replace_all("^|$", "**") %>%
sprintf("<span style='font-size:15px;'>%s</span>", .) %>%
html() %>% md()}) %>% opt_table_font(font = "rounded-sans") %>%
tab_style(style = cell_text(size = px(15)),locations = cells_body()) %>%
tab_options(ihtml.active =TRUE, ihtml.use_highlight=TRUE, ihtml.use_filters=TRUE, ihtml.use_sorting=TRUE,
page.header.use_tbl_headings=TRUE, ihtml.use_search=TRUE, ihtml.use_compact_mode=TRUE)
entities_gt# Download filename.csv in raw bytes. Use the entityName and entityID as keys.
entityName <- "w1_acsa_seed_physical"
#visualize the data and save as dataframe
entityid <- entities$entityId[entities$entityName == entityName]
raw <- read_data_entity(packageid, entityid)
data <- readr::read_csv(file = raw)## Rows: 359 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Watershed, Elevation, Transect
## dbl (11): Year, Sample, StemLength, Leaf1Area, Leaf2Area, LeafDryMass, StemD...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data_gt <- gt(data) %>% fmt_auto %>%
tab_header(title = "Downloaded Dataset") %>%
cols_label_with(fn = function(x) {x %>% toupper() %>% str_replace_all("^|$", "**") %>%
sprintf("<span style='font-size:10px;'>%s</span>", .) %>%
html() %>% md()}) %>% opt_table_font(font = "rounded-sans") %>%
tab_style(style = cell_text(size = px(10)),locations = cells_body()) %>%
tab_options(ihtml.active =TRUE, ihtml.use_highlight=TRUE, ihtml.use_sorting=TRUE,
page.header.use_tbl_headings=TRUE, ihtml.use_search=TRUE, ihtml.use_compact_mode=TRUE)
data_gt# Clean/Wrangle Data and Prepare for Summary Statistics Analysis
hbr_maples <- hbr_maples_raw %>%
clean_names() %>%
dplyr::select(-root_area,-root_length,-root_dry_mass) %>%
# change values of -999 to NA
mutate(
leaf1area = replace(leaf1area, which(leaf1area < 0), NA),
leaf2area = replace(leaf2area, which(leaf2area < 0), NA),
corrected_leaf_area = replace(corrected_leaf_area, which(corrected_leaf_area < 0), NA),
watershed = as.factor(watershed),
elevation = as.factor(elevation),
transect = as.factor(transect),
sample = as.factor(sample)
)
# Summary statistics for the data set
maple_summary <- hbr_maples %>%
drop_na(stem_length) %>%
group_by(year, watershed) %>%
dplyr::summarize(
mean_length = mean(stem_length, na.rm=TRUE),
median_length = median(stem_length, na.rm=TRUE),
sd_length = sd(stem_length, na.rm=TRUE),
n = n()
)
maple_summary_gt <- gt(maple_summary) %>% fmt_auto %>%
tab_header(title = "Maple Summary Statistics") %>%
cols_label_with(fn = function(x) {x %>% janitor::make_clean_names(., case = "title") %>%
toupper() %>% str_replace_all("^|$", "**") %>%
sprintf("<span style='font-size:16px;'>%s</span>", .) %>%
html() %>% md()}) %>% opt_table_font(font = "rounded-sans") %>%
tab_style(style = cell_text(size = px(14)),locations = cells_body())
maple_summary_gt| Maple Summary Statistics | ||||
| WATERSHED | MEAN LENGTH | MEDIAN LENGTH | SD LENGTH | N |
|---|---|---|---|---|
| 2003 | ||||
| Reference | 80.985 | 79.85 | 13.939 | 120 |
| W1 | 87.886 | 86.15 | 14.342 | 120 |
| 2004 | ||||
| Reference | 85.881 | 85 | 15.586 | 59 |
| W1 | 97.517 | 95.5 | 13.83 | 60 |
# Extract colors from the "BrBG" palette (brown green palette)
palette_colors <- RColorBrewer::brewer.pal(n = 11, name = "BrBG")
# Assign Colors for Plotting
plot_colors <- palette_colors[c(2, 9)]
ggplot(data = hbr_maples, aes(x = watershed, y = stem_length)) +
geom_jitter(
aes(color = watershed), alpha = 0.85, show.legend = TRUE, size=0.75,
position = position_jitter(width = 0.2, seed = 0)) +
geom_boxplot(aes(color = watershed), fill = NA,
alpha=0.75, width = 0.5, linewidth=0.5) +
scale_color_manual(values = plot_colors) +
labs(x = "Watershed", y = "Stem Length (mm)",
title = "Stem Lengths of Sugar Maple Seedlings", subtitle = "Hubbard Brook LTER") +
facet_wrap(~year) +
theme_minimal(base_family = "sans") +
theme(plot.title = element_text(face = "bold"), plot.subtitle = element_text(face = "italic"),
axis.title = element_text(face = "bold"), legend.position = "bottom",
panel.grid.major.x=element_blank())ggplot(data = hbr_maples, aes(x = stem_length)) +
geom_histogram(bins=30, aes(fill=factor(year)), color="white", lineiwdth=0.05) +
scale_fill_manual(values = plot_colors) +
theme_minimal(base_family = "sans") +
theme(plot.title = element_text(face = "bold"), plot.subtitle = element_text(face = "italic"),
axis.title = element_text(face = "bold"), legend.position = "bottom") +
labs(
x = "Stem Length (mm)",
y = "Frequency",
title = "Distribution of Sugar Maple Seedling Stem Lengths",
subtitle = "Hubbard Brook LTER"
) +
facet_grid(year ~ watershed)length_colors <- palette_colors[c(7:11)]
ggplot(hbr_maples) +
geom_point(aes(color = stem_length, x = stem_length, y = stem_dry_mass), alpha = 0.6) +
scale_color_gradientn(colors = length_colors) +
labs(
x = "Stem Length (mm)",
y = "Stem Dry Mass (g)",
title = "Stem Dry Mass vs. Stem Length in Sugar Maple Seedlings",
subtitle = "Hubbard Brook LTER"
) +
theme_minimal(base_family = "sans") +
theme(plot.title = element_text(face = "bold"), plot.subtitle = element_text(face = "italic"),
axis.title = element_text(face = "bold"), legend.position = "none",
panel.grid.major.x=element_blank())ggplot(hbr_maples) +
geom_point(aes(x = stem_length, y = stem_dry_mass, color = factor(year))) +
scale_color_manual(values = plot_colors) +
labs(
x = "Stem Length (mm)",
y = "Stem Dry Mass (g)",
title = "Stem Dry Mass vs. Stem Length in Sugar Maple Seedlings",
subtitle = "Hubbard Brook LTER",
color = "year"
) +
facet_wrap(~watershed) +
theme_minimal(base_family = "sans") +
theme(plot.title = element_text(face = "bold"), plot.subtitle = element_text(face = "italic"),
axis.title = element_text(face = "bold"), legend.position = "bottom")f_test_maple <- hbr_maples %>%
filter(year == 2004) %>%
var.test(stem_length ~ watershed, data = .)
# Statistical tests
hbr_maples %>%
filter(year == 2004) %>%
var.test(stem_length ~ watershed, data = .) %>% tidy() %>% gt() %>% fmt_auto %>%
tab_header(title = "Maple Leaf Variance Test Results") %>%
cols_label_with(fn = function(x) {x %>% janitor::make_clean_names(., case = "title") %>%
toupper() %>% str_replace_all("^|$", "**") %>%
sprintf("<span style='font-size:12px;'>%s</span>", .) %>%
html() %>% md()}) %>% opt_table_font(font = "rounded-sans") %>%
tab_style(style = cell_text(size = px(12)),locations = cells_body())| Maple Leaf Variance Test Results | ||||||||
| ESTIMATE | NUM DF | DEN DF | STATISTIC | P VALUE | CONF LOW | CONF HIGH | METHOD | ALTERNATIVE |
|---|---|---|---|---|---|---|---|---|
| 1.27 | 58 | 59 | 1.27 | 0.363 | 0.757 | 2.132 | F test to compare two variances | two.sided |
t_test_maple <- hbr_maples %>%
filter(year == 2004) %>%
t.test(stem_length ~ watershed,
var.equal = TRUE,
data = .)
hbr_maples %>%
filter(year == 2004) %>%
t.test(stem_length ~ watershed,
var.equal = TRUE,
data = .) %>% tidy() %>% gt() %>% fmt_auto %>%
tab_header(title = "Maple Leaf T-Test Results") %>%
cols_label_with(fn = function(x) {x %>% janitor::make_clean_names(., case = "title") %>%
toupper() %>% str_replace_all("^|$", "**") %>%
sprintf("<span style='font-size:12px;'>%s</span>", .) %>%
html() %>% md()}) %>% opt_table_font(font = "rounded-sans") %>%
tab_style(style = cell_text(size = px(12)),locations = cells_body()) | Maple Leaf T-Test Results | |||||||||
| ESTIMATE | ESTIMATE1 | ESTIMATE2 | STATISTIC | P VALUE | PARAMETER | CONF LOW | CONF HIGH | METHOD | ALTERNATIVE |
|---|---|---|---|---|---|---|---|---|---|
| −11.635 | 85.881 | 97.517 | −4.309 | 3.432 × 10−5 | 117 | −16.983 | −6.288 | Two Sample t-test | two.sided |
# Clear Directory Contents
if (!dir_exists(tempdir())) {
dir_create(tempdir())
} else {
dir_ls(tempdir()) %>% file_delete()
}
# Downloading Zipped Files
packageid <- "edi.1066.1"
read_data_package_archive(packageid, path = tempdir())## Downloading: 8 kB Downloading: 8 kB Downloading: 16 kB Downloading: 16 kB Downloading: 24 kB Downloading: 24 kB Downloading: 33 kB Downloading: 33 kB Downloading: 33 kB Downloading: 33 kB Downloading: 33 kB Downloading: 33 kB Downloading: 41 kB Downloading: 41 kB Downloading: 41 kB Downloading: 41 kB
# Viewing Entities
entities <- read_data_entity_names(packageid)
entities_gt <- gt(entities) %>% fmt_auto() %>%
cols_label_with(fn = function(x) {x %>% toupper() %>% str_replace_all("^|$", "**") %>%
sprintf("<span style='font-size:15px;'>%s</span>", .) %>%
html() %>% md()}) %>% opt_table_font(font = "rounded-sans") %>%
tab_style(style = cell_text(size = px(15)),locations = cells_body()) %>%
tab_options(ihtml.active =TRUE, ihtml.use_highlight=TRUE, ihtml.use_filters=TRUE, ihtml.use_sorting=TRUE,
page.header.use_tbl_headings=TRUE, ihtml.use_search=TRUE, ihtml.use_compact_mode=TRUE)
#entities_gt
# Download filename.csv in raw bytes. Use the entityName and entityID as keys.
entityName <- "ILTER_Stream_Deposition_Nitrogen"
#visualize the data and save as dataframe
entityid <- entities$entityId[entities$entityName == entityName]
raw <- read_data_entity(packageid, entityid)
data <- readr::read_csv(file = raw)
data_gt <- gt(data) %>% fmt_auto %>%
tab_header(title = "Downloaded Dataset") %>%
cols_label_with(fn = function(x) {x %>% toupper() %>% str_replace_all("^|$", "**") %>%
sprintf("<span style='font-size:10px;'>%s</span>", .) %>%
html() %>% md()}) %>% opt_table_font(font = "rounded-sans") %>%
tab_style(style = cell_text(size = px(10)),locations = cells_body()) %>%
tab_options(ihtml.active =TRUE, ihtml.use_highlight=TRUE, ihtml.use_sorting=TRUE,
page.header.use_tbl_headings=TRUE, ihtml.use_search=TRUE, ihtml.use_compact_mode=TRUE)
data_gt